library(vegan) envdata<-read.csv('env.csv', header=TRUE, row.names=1) #perform PCA using prcomp NOTE HERE: default for scale is FALSE, need to make scale=TRUE to use correlation matrix (correlation maxtrix NEEDS to be used if your variables are in different scales) env.pca<-prcomp(envdata,scale=TRUE) #go through and interpret the results from the PCA summary(env.pca) #note that this analysis presents SDs and NOT the variances (i.e. eigenvalues) - these need to be squared to become eigenvalues, so let's look at the the SD squared next env.eig<-env.pca$sdev^2 #look at it env.eig #or you can calculate eigenvalues using pca.eigenval pca.eigenval(env.pca) #screeplot where each eigenvalue for each successive PC is depicted, overlay with a null distribution derived from the broken-stick model screeplot(env.pca, bstick=TRUE) #if you look at this plot, the first 2 PC are not higher than the broken stick model, but our notes say that this large amount of variance (46%) is still ecologically sig #alternatively, test stat significance of first several PCA using a Monte Carlo (randomization) test ordi.monte(envdata,ord='pca',dim=5) #here you get a frequence distribution of eigenvalues of randomized datasets compared to the real eigenvalues (so it's good if the real eigenvalue is lower than all the random ones) it also gives you p-values for each PC #NOW we can examine the eigenvectors (i.e. variable loadings) only the first 5 are shown env.pca$rotation #note: here strong correlations (either pos or neg) indicate a high contribution toward the linear combination comprising each PCA #or you can calculate eigenvectors using pca.eigenvec (cutoff is used to show only larger values, dim means only first 3 compontents are shown) pca.eigenvec(env.pca,dim=5,digits=3,cutoff=.1) #convert the eigenvector coefficients into simple correlation coefficients (i.e. Pearson product-moment correlations). The structure coefficients (loadings) are linear correlations between original vriable and pc scores pca.structure(env.pca,envdata,dim=5,cutoff=0.4) #NOW look at sample scores and ordination plots env.scores<-env.pca$x env.scores #NOW visualize the ordination using biplot() or ordiplot() #easy.. biplot(env.pca) #more flexible and preferred approach, here choices are axes shown, type is the type of graph which may be points, text or none, display only sites or speces, the default for display is to show both, xlim and ylim (min,max) of the plot, then labels is optional text used for labels ordiplot(env.pca,choices=c(1,2),type='text',display='sites',xlab="PC 1 (27%)", ylab="PC 2 (18%)") #next add the eigenvectors (i.e. variable loadings) #first add the arrows (multiply them by 5 so you can see them) arrows(0,0,env.pca$rotation[,1]*5,env.pca$rotation[,2]*5,col='purple') #then we added the text labels text(env.pca$rotation[,1]*5.2,env.pca$rotation[,2]*5.2,row.names(env.pca$rotation)) #Exercise: #use the env data to generate a covariance matrix based PCA envcovar.pca<-prcomp(envdata,scale=FALSE) #go through and interpret the results from the PCA summary(envcovar.pca) #note that this analysis presents SDs and NOT the variances (i.e. eigenvalues) - these need to be squared to become eigenvalues, so let's look at the the SD squared next env.eig<-envcovar.pca$sdev^2 #look at it env.eig #or you can calculate eigenvalues using pca.eigenval pca.eigenval(envcovar.pca) #assess using broken stick screeplot(envcovar.pca, bstick=TRUE) #NOW we can examine the eigenvectors (i.e. variable loadings) only the first 5 are shown envcovar.pca$rotation pca.eigenvec(envcovar.pca,dim=5,digits=3,cutoff=.1) #convert the eigenvector coefficients into simple correlation coefficients (i.e. Pearson product-moment correlations). The structure coefficients (loadings) are linear correlations between original vriable and pc scores pca.structure(envcovar.pca,envdata,dim=5,cutoff=0.4) #perform PCR on species abundance speabu<-read.csv('abund.csv', header=TRUE,row.names=1) speabu.pca<-prcomp(speabu[,-1],scale=FALSE) summary(speabu.pca) pca.eigenval(speabu.pca) #assess using broken stick screeplot(speabu.pca, bstick=TRUE) biplot(speabu.pca) ordiplot(speabu.pca,choices=c(1,2),type='text',display='sites',xlab="PC 1", ylab="PC 2") #next add the eigenvectors (i.e. variable loadings) #first add the arrows (multiply them by 5 so you can see them) arrows(0,0,speabu.pca$rotation[,1]*15,env.pca$rotation[,2]*15,col='purple') #then we added the text labels text(env.pca$rotation[,1]*5.2,speabu.pca$rotation[,2]*5.2,row.names(env.pca$rotation)) #just wanted to go back and look at the biplot using the covariance. it's quite obvious that elevation is really skewing this guy. explains 99% of variance here and look like only driver. biplot(envcovar.pca) pca.eigenval(envcovar.pca) pca.eigenvec(envcovar.pca)